%load_ext autoreload
%autoreload 2
%matplotlib inline
from util import *
### path to the pickle files ###
path = "D:\\jeries\\output\\TASC_pickles\\"
if path[-1] != "\\":
raise ValueError("path must end with a \\. Please try again")
## File names to read
ST = r"rawdata.pickle"
STgraph = r"rawdatagraph.pickle"
## Morphological Features
MorphoFeatures = ['Area','Ellip_Ax_B_X', 'Ellip_Ax_B_Y', 'Ellip_Ax_C_X', 'Ellip_Ax_C_Y',
'EllipsoidAxisLengthB', 'EllipsoidAxisLengthC',
'Ellipticity_oblate', 'Ellipticity_prolate',
'Sphericity', 'Eccentricity']
## BPW Features
BpwFeatures = ['Velocity_Full_Width_Half_Maximum',
'Velocity_Time_of_Maximum_Height',
'Velocity_Maximum_Height',
'Velocity_Ending_Value', 'Velocity_Ending_Time',
'Velocity_Starting_Value', 'Velocity_Starting_Time']
rawdata = pd.read_pickle(path+ST)
rawdatagraph = pd.read_pickle(path+STgraph)
rawdata.dropna(inplace=True)
rawdatagraph.dropna(inplace=True)
expNamesInOrder, expNamesInOrderU, expNamesInOrderUGraph, dataAll,\
dataAllGraph = splitingExperimentCol(rawdata,rawdatagraph)
# [print(str(i)+'.',exp) for i,exp in zip(range(0,len(expNamesInOrderU)),expNamesInOrderU)];
val_c = dataAllGraph['Experiment'].value_counts().sort_index()[dataAllGraph['Experiment'].unique()].rename_axis('Experiment').reset_index(name='Number of Cells')
val_c.index.name = 'Index #'
display(val_c)
val_c_1 = dataAllGraph['Experiment'].value_counts()[dataAllGraph['Experiment'].unique()].rename_axis('Experiment').reset_index(name='Number of Cells')
val_c_1.index.name = 'Index #'
print(expNamesInOrderU)
display(val_c_1)
Let's assume you have 3 wells: [0,1,2] \ Well 0 and 1 have 500 cells \ While well 2 have 1000 cells \ You wish to have the same amount of cell in each well - that is why we've created the "SAMPLE" variable and his dependencies:
True/False (Boolean) - (whether you want to sample cells or not)
[ ] - is an integer list determine the indices you would like to sample (for example in the case we presented here it is [3]
an integer - depend on the sample size (if you wish to have half of the cells: 2, if you wish to have 1/3 of the cells it will be: 3... etc.)
### experiments if sample needed
SAMPLE = False ## Boolean
expList = [16,17,18,19,20,21,22,23] ## an integer list
### sample step
CellSample = 2 ## integer
np.arange() is a NumPy function that creates an aranged list with running number see the link below for more information: https://numpy.org/doc/stable/reference/generated/numpy.arange.html
Many times it can help you shorten your numbering\ If you want to collect all the first 16 wells you can just use:\ expListT = np.arange(16)\ instead of:\ expListT = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
expListT can be used to arange the wells appearance.\ If you have for example three wells and your indices are [0,1,2] but you want it to appear as [1,2,0] you can control it by typing the indices in different order.\ this is available only if you set SAMPLE to False
## experiments need to be analyze (exclude the sampled experiments)
expListT = [0,1,2,3,4,5,6,7,8,9,10,11]
All the files this script will create will get "title" as a prefix to there name
## Change title experiment Name ##
title = 'jeries_test'
## Change the number of clusters
k_cluster = 3
MorphoOut:\ True/False - Boolean variable
MorphoIn:\ True/False - Boolean variable
## if analyze only kinetics - True, else False ##
MorphoOut = False
## if analyze only morphological - True, else False ##
MorphoIn = False
Here you should separate the treatments you have used in your experiments (as you remember each treatment is 4 characters)\ In the example here the treatments are:\ HGF2,HGF7,DOX1,PHA4,PHA3\ \ You also have a combination of treatments - the program will find them automatically\ \ Treatments that don't have a combination with each other should be at the same list:\ [['HGF2','HGF7','DOX1'], ['PHA4','PHA3']]\ \ In this example we won't find a well titled: 'HGF2HGF7' or 'HGF2DOX1'\ BUT\ we may find 'HGF7PHA4'\ \ so HGF7 and PHA4 need to be in a different list\ The pattern is:\ combin = [[X1, X2,..],[Y1, Y2,..],[Z1, Z2,..]]\
## Change to the names of the *treatments* that can be combined => combin = [[X1, X2,..],[Y1, Y2,..],[Z1, Z2,..]]
combin = [["NNIR"],["METR"],["GABY"]]
Variables:\ singleTREAT:\ Boolean - True/False\ \ singleCONTROL:\ Boolean - True/False\ \ multipleCL: Boolean - True/False\ \
This part depends on the well list you picked...\ let's assume that we have multiple treatments and multiple cell lines, then:\
singleTREAT = False ## Because we have multiple treatments\
singleCONTROL = True ## Because we have multiple control wells (one for each cell line) BUT they are all the same 'CON' representation\
multipleCL = True ## Because we have multiple cell lines (BT45,MDA2,MCF7)\
## if we analyze a single treatment with numerous cell lines for example ##
singleTREAT = False
## if a single well is control ##
singleCONTROL = True
## if Multiple cell lines being analyzed ##
multipleCL = False
Both are integers... in the case presented here we have 3 cell lines and 8 different treatments (including combinations)\ so we arrange it in that way:\ \ nrows=3 ## each row is a cell line\ ncols=8 ## each column is a treatment\ \ The default values - if you do not wish to change it, let it be:\ nrows=0\ ncols=1
Both are integers...in case of this example..\ \ nColor=3\ nShades=8\ \ The default values - if you do not wish to change it, let it be:\ nColor=0\ nShades=0
Number of colors and shades in the Treatments titled figures (only if needed) - currently do not change it\ \ nColorTreat=0\ nShadesTreat=0\ these are the default values - integers
Number of colors and shades in the y-position titled figures (only if needed) - currently do not change it\ type: integer\ \ nColorLay=3\ nShadesLay=3
type: integer tuple (int,int) figure size in proportion to the number of sub figures in the experiment titled figures\ figsizeEXP = (40,15) \ figure size in proportion to the number of sub figures in the treatments titled figures\ figsizeTREATS = (30, 15)\ figure size in proportion to the number of sub figures in the CellLines titled figures\ figsizeCL= (15,5)
##### graph properties #####
## number of rows and columns in the experiment titled figures
nrows=1
ncols=12
## number of colors and shades in the experiment titled figures
nColor= 12
nShades= 2
## number of colors and shades in the Treatments titled figures (only if needed)
nColorTreat=0
nShadesTreat=0
## number of colors and shades in the y position titled figures (only if needed)
nColorLay=3
nShadesLay=3
## figure size in proportion to the number of sub figures in the experiment titled figures
figsizeEXP = (25,5)
## figure size in proportion to the number of sub figures in the treatments titled figures
figsizeTREATS = (15, 5)
## figure size in proportion to the number of sub figures in the CellLines titled figures
figsizeCL= (15,15) #(15, 5)
control name can be either: 1 ['X'] or some ['X', 'Y',...] \ CON = ['CON']\ Cell Line can be either: 1 ['X'] or some ['X', 'Y',...] \ CL = ['BT54','MDA2','MCF7']\ Experiment can be either: 1 ['X'] or some ['X', 'Y',...] \
wellCON = ['HA033080917CHR1C02BT54CON0WH00','HA033080917CHR1D02MDA2CON0WH00', 'HA033080917CHR1F02MCF7CON0WH00']
list of strings, for example - ['BT54HGF7] if you wish the 'BT54HGF7' will be the control\ \ control = [] ## in our case
### chi square tests ###
# control name can be either: 1 or some ['X', 'Y',...] #
CON = ['NOCO']
# Cell Line can be either: 1 or some ['X', 'Y',...] #
CL = ['293T']
# Experiment can be either: 1 or some ['X', 'Y',...] #
wellCON = ['AM001100425CHR2B02293TNNIRNOCOWH00'
'AM001100425CHR2B03293TNNIRNOCOWH00'
'AM001100425CHR2B04293TNNIRNOCOWH00'
'AM001100425CHR2C02293TMETRNNIRNOCOWH00'
'AM001100425CHR2C03293TMETRNNIRNOCOWH00'
'AM001100425CHR2C04293TMETRNNIRNOCOWH00'
'AM001100425CHR2D02293TGABYNNIRNOCOWH00'
'AM001100425CHR2D03293TGABYNNIRNOCOWH00'
'AM001100425CHR2D04293TGABYNNIRNOCOWH00'
'AM001100425CHR2E02293TNNIRMETRGABYNOCOWH00'
'AM001100425CHR2E03293TNNIRMETRGABYNOCOWH00'
'AM001100425CHR2E04293TNNIRMETRGABYNOCOWH00']
controls = []
# Sorting controls by length to avoid problems later
controls.sort(key=len, reverse=True)
## If Hirarchical clustering needed (apply only on small amount of experiment - depends on the number of cells)
HC=False
## If AutoEncoder if wanted - True else: False
AE_model = False
## Name of AE model if known example: 'd100220h122850'
model_name = 'd100220h122850'
## Choose the index of the experiments you want to analyze
# expList = [52,53,56,57,58,59] ## [0,1,2,..]
if SAMPLE:
expM, Features, dataSpec, dataSpecGraph = analysisExp(dataAll,dataAllGraph,
expList,expNamesInOrderU)
expM
print(expListT,expNamesInOrderU)
## Change these 2: # CellSample=2 => sample every second cell track, CellSample=3 => sample every third cell track
# SAMPLE=True
# CellSample = 2
FigureNumber = 1
if SAMPLE:
uParent = dataSpecGraph['Parent'].unique()
ParentList = [par if par in uParent[::CellSample] else 0 for par in dataSpecGraph['Parent']]
dataSpecGraphP = dataSpecGraph.loc[dataSpecGraph['Parent']==ParentList].copy()
expM = dataSpecGraphP['Experiment'].unique()
expM = [e for e in expM]
## Choose the index of the experiments you want to analyze
# rest of the experiments
# expListT = np.append(np.append(np.arange(38),(np.arange(40,46))),np.arange(48,52)).tolist() ## [0,1,2,..]
exp, Features, dataSpec, dataSpecGraph = analysisExp(dataAll.copy(),dataAllGraph.copy(),
expListT,expNamesInOrderU)
if SAMPLE:
dataSpecGraphN = pd.concat([dataSpecGraph,dataSpecGraphP])
exp.extend(expM)
else:
dataSpecGraphN = dataSpecGraph
if MorphoOut:
Features = Features.drop(MorphoFeatures).copy()
if MorphoIn:
MorphoFeatures.append('Experiment')
Features = MorphoFeatures
# Histogram
histogramData(exp,dataSpecGraphN,Features[:-1],FigureNumber)
FigureNumber+=1
histogramDataKDE(exp,dataSpecGraphN,Features[:-1],FigureNumber,nColor=nColor, nShades=nShades)
FigureNumber+=1
def histogramDataKDE(exp,data,Features,FigureNumber,nColor=0,nShades=0):
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
nrows, ncols = get_rows_cols(len(Features))
fig, axes = plt.subplots(nrows=nrows, ncols=ncols,figsize=(25.,25.),dpi=100)
fig2, ax2 = plt.subplots(figsize=(10,6))
if nColor==0:
colors = sns.color_palette("hls", len(exp))
else:
colors = ChoosePalette(nColor,nShades)
for par, ax in zip(Features,axes.flat):
for label, color in zip(range(len(exp)), colors):
vals = np.float64(data[par].loc[data['Experiment']==exp[label]])
sns.kdeplot(vals, ax=ax,
label=exp[label], color=color, #density=True, stacked=True,
)
ax.set_xlabel(par,fontdict={'fontsize':15})
# fig.set_tight_layout(True)
labels_handles = {label: handle for ax in fig.axes for handle, label in zip(*ax.get_legend_handles_labels())}
# fig.set_tight_layout(False)
for a in axes.flat:
try:
a.get_legend().remove()
except:
display('')
fig2.legend(labels_handles.values(),
labels_handles.keys(),
loc='center',fontsize='xx-large',
framealpha=1,edgecolor='black'
)
fig.tight_layout(pad=1.01)
fig.subplots_adjust(top=0.85)
plt.show()
return None
histogramDataKDE(exp,dataSpecGraphN,Features[:-1],FigureNumber,nColor=nColor, nShades=nShades)
###### DO NOT CHANGE THESE: ######
columnsToDrop = ['Experiment','ID','TimeIndex','y_Pos','x_Pos','dt','Parent'] ## Columns to Drop
labelsCol = 'Experiment' ## Label to Encode
FigureNumber=3
## number of group for the k-means analysis
# k_cluster = 3
# Clustergram, PCA, k-means, TimePoints
if MorphoOut:
dataDrop = dataSpecGraphN.drop(columns=MorphoFeatures+columnsToDrop).copy()
else:
dataDrop = dataSpecGraphN.drop(columns=columnsToDrop).copy()
if MorphoIn:
dataDrop = dataSpecGraphN[MorphoFeatures[:-1]].copy()
dataLabel = dataSpecGraphN[['Experiment','TimeIndex','dt','y_Pos']].copy()
pca_df, FigureNumber, kmeans_pca, labelsT, k_cluster, AE_df, pca = TASC(dataDrop,
dataLabel,
labelsCol=['Experiment','Treatments','Layers','TimeLayers'],
LE=[True, False, False, False],
title=title,
HC=HC,
combTreats=combin,
LY = 9, TI = 3,
k_cluster = k_cluster,
multipleCL=multipleCL,
singleTREAT=singleTREAT,
FigureNumber=FigureNumber,
nrows=nrows, ncols=ncols,
nColor=nColor, nShades=nShades,
nColorTreat=nColorTreat, nShadesTreat=nShadesTreat,
nColorLay=nColorLay, nShadesLay=nShadesLay,
figsizeEXP=figsizeEXP, figsizeTREATS=figsizeTREATS,
figsizeCL=figsizeCL,
Features=Features, AE_model=AE_model,
model_name=model_name)
## pca_df, FigureNumber, kmeans_pca, labelsT, AE_df
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
Par = 'TimeIndex'
pca_df[Par] = dataSpecGraphN[Par].values.copy()
histByKmeans(pca_df,Par,k_cluster=k_cluster,bar_width=0.3,figsize=(15,5),rotate=-45)
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
Par = 'TimeLayers'
histByKmeans(pca_df,'TimeLayers',k_cluster=k_cluster,bar_width=0.3,figsize=(15,5),labels=labelsT)
display(Latex('$\color{blue}{\Large Distribution\ all\ features\ by\ groups}$'))
## kmeans groups clustering
Groups = range(k_cluster)
dataSpecGraphN['Groups'] = kmeans_pca['Groups'].copy()
histogramDataKDELabels(Groups,dataSpecGraphN,Features[:-1],FigureNumber,Par='Groups',nColor=0, nShades=0)
FigureNumber+=1
## Topographic k-means in PCA
FigureNumber = 28
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
fig1, ax1 = plt.subplots(figsize=(6,6), dpi=100)
fig2, ax2 = plt.subplots(figsize=(5,5))
dataSpecGraphN['PC1'] = kmeans_pca['PC1'].copy()
dataSpecGraphN['PC2'] = kmeans_pca['PC2'].copy()
# dataEXP = dataSpecGraphN.groupby(['Experiment'])
palette = sns.color_palette("hls", len(Groups))
# for ex, a1 in zip(exp, ax1.reshape(-1)):
# exp_df = dataEXP.get_group(ex)
uPar1tmp = kmeans_pca['Groups'].unique()
uPar1tmp.sort()
pal = iter(sns.color_palette([palette[p] for p in [Groups.index(value) for value in uPar1tmp]]))
for g in uPar1tmp:
if len(kmeans_pca['PC1'].loc[kmeans_pca['Groups']==g])>3:
sns.kdeplot(kmeans_pca['PC1'].loc[kmeans_pca['Groups']==g], kmeans_pca['PC2'].loc[kmeans_pca['Groups']==g],
label='Group '+str(g), n_levels=5, bw=.5, ax=ax1, color=next(pal), cut=10)
else:
next(pal);
# a1.autoscale(enable=True, tight=True)
# a1.set_xlim([pca_df['PC1'].min()+0.1*exp_df['PC1'].min(), pca_df['PC1'].max()-0.1*pca_df['PC1'].max()])
# a1.set_ylim([pca_df['PC2'].min()+0.1*exp_df['PC2'].min(), pca_df['PC2'].max()-0.1*pca_df['PC2'].max()])
# a1.set_title(ex)
labels_handles1 = {label: handle for ax1 in fig1.axes for handle, label in zip(*ax1.get_legend_handles_labels())}
# fig1.subplots_adjust(right=0.8)
# for a in ax1.flat:
# try:
# ax1.get_legend().remove()
# except:
# continue
fig2.legend(labels_handles1.values(),
labels_handles1.keys(),
loc='upper right',fontsize='xx-large',
framealpha=1,edgecolor='black')
# fig2.subplots_adjust(right=0.5)
plt.show()
## Topographic k-means in PCA
FigureNumber = 29
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
fig1, ax1 = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsizeEXP, dpi=100,sharex=True, sharey=True)
fig2, ax2 = plt.subplots(figsize=(5,5))
dataSpecGraphN['PC1'] = kmeans_pca['PC1'].copy()
dataSpecGraphN['PC2'] = kmeans_pca['PC2'].copy()
dataEXP = dataSpecGraphN.groupby(['Experiment'])
palette = sns.color_palette("hls", len(Groups))
for ex, a1 in zip(exp, ax1.reshape(-1)):
exp_df = dataEXP.get_group(ex)
uPar1tmp = exp_df['Groups'].unique()
uPar1tmp.sort()
pal = iter(sns.color_palette([palette[p] for p in [Groups.index(value) for value in uPar1tmp]]))
for g in uPar1tmp:
if len(exp_df['PC1'].loc[exp_df['Groups']==g])>3:
sns.kdeplot(exp_df['PC1'].loc[exp_df['Groups']==g], exp_df['PC2'].loc[exp_df['Groups']==g],
label='Group '+str(g), n_levels=5, bw=.5, ax=a1, color=next(pal), cut=10)
else:
next(pal);
a1.autoscale(enable=True, tight=True)
# a1.set_xlim([pca_df['PC1'].min()+0.1*exp_df['PC1'].min(), pca_df['PC1'].max()-0.1*pca_df['PC1'].max()])
# a1.set_ylim([pca_df['PC2'].min()+0.1*exp_df['PC2'].min(), pca_df['PC2'].max()-0.1*pca_df['PC2'].max()])
a1.set_title(ex)
labels_handles1 = {label: handle for ax1 in fig1.axes for handle, label in zip(*ax1.get_legend_handles_labels())}
# fig1.subplots_adjust(right=0.8)
for a in ax1.flat:
try:
a.get_legend().remove()
except:
continue
fig2.legend(labels_handles1.values(),
labels_handles1.keys(),
loc='upper right',fontsize='xx-large',
framealpha=1,edgecolor='black')
fig2.subplots_adjust(right=0.5)
plt.show()
## Topographic k-means in PCA
FigureNumber = 29
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
fig1, ax1 = plt.subplots(nrows=ncols, ncols=nrows, figsize=figsizeEXP[::-1],dpi=100,sharex=True, sharey=True)
fig2, ax2 = plt.subplots(figsize=(5,5))
dataSpecGraphN['PC1'] = kmeans_pca['PC1'].copy()
dataSpecGraphN['PC2'] = kmeans_pca['PC2'].copy()
dataEXP = dataSpecGraphN.groupby(['Experiment'])
palette = sns.color_palette("hls", len(Groups))
for ex, a1 in zip(exp, ax1.reshape(-1)):
exp_df = dataEXP.get_group(ex)
uPar1tmp = exp_df['Groups'].unique()
uPar1tmp.sort()
pal = iter(sns.color_palette([palette[p] for p in [Groups.index(value) for value in uPar1tmp]]))
for g in uPar1tmp:
if len(exp_df['PC1'].loc[exp_df['Groups']==g])>3:
sns.kdeplot(exp_df['PC1'].loc[exp_df['Groups']==g], exp_df['PC2'].loc[exp_df['Groups']==g],
label='Group '+str(g), n_levels=5, bw=.5, ax=a1, color=next(pal), cut=10)
else:
next(pal);
a1.autoscale(enable=True, tight=True)
a1.set_xlim([pca_df['PC1'].min()+0.1*exp_df['PC1'].min(), pca_df['PC1'].max()-0.1*pca_df['PC1'].max()])
a1.set_ylim([pca_df['PC2'].min()+0.1*exp_df['PC2'].min(), pca_df['PC2'].max()-0.1*pca_df['PC2'].max()])
a1.set_title(ex)
labels_handles1 = {label: handle for ax1 in fig1.axes for handle, label in zip(*ax1.get_legend_handles_labels())}
for a in ax1.flat:
try:
a.get_legend().remove()
except:
continue
fig1.subplots_adjust(right=0.8)
fig2.legend(labels_handles1.values(),
labels_handles1.keys(),
loc='upper right',fontsize='xx-large',
)
fig2.subplots_adjust(right=0.5)
plt.show()
FigureNumber = 30
dataSpecGraphN['Treatments'] = pca_df['Treatments'].copy()
dataTreat = dataSpecGraphN.groupby(['Treatments'])
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
Par = 'Treatments'
lb_make = LabelEncoder()
pca_df['TreatmentsLabels'] = lb_make.fit_transform(pca_df[Par])
uLabelT = [u for u in pca_df['Treatments'].unique()]
histByKmeansTreats(pca_df,'TreatmentsLabels',k_cluster=k_cluster,bar_width=0.3,figsize=(15,5),labels=uLabelT)
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
Par='TreatmentsLabels'
labels= list(pca_df.groupby('Treatments').describe().index.values)
histByKmeansTreatsLabel(pca_df,Par='TreatmentsLabels',k_cluster=k_cluster,bar_width=0.3,figsize=(15,5),labels=labels,rotate=0)
if not singleTREAT:
display(Latex('$\color{blue}{\Large Treatments p-Value}$'))
labels= list(pca_df.groupby('Treatments').describe().index.values)
Par = 'Treatments'
for expectation in CON:
chi_square_test_tables(pca_df,labels,expectation=expectation,Par=Par)
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
Par='CellLine'
lb_make = LabelEncoder()
pca_df['CellLineLabels'] = lb_make.fit_transform(pca_df[Par])
labels= list(pca_df.groupby('CellLine').describe().index.values)
histByKmeansTreatsLabel(pca_df,Par='CellLineLabels',k_cluster=k_cluster,bar_width=0.3,figsize=(15,5),labels=labels,rotate=0)
if multipleCL:
for expectation in CL:
display(Latex('$\color{blue}{\Large Cell Line p-Value %s}$'%(expectation)))
labels= list(pca_df.groupby(Par).describe().index.values)
chi_square_test_tables(pca_df,labels,expectation=expectation,Par=Par)
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
Par = 'Experiment'
lb_make = LabelEncoder()
pca_df['ExperimentsLabels'] = lb_make.fit_transform(pca_df[Par])
uLabelEXP = [u for u in pca_df['Experiment'].unique()]
histByKmeansTreats(pca_df,'ExperimentsLabels',k_cluster=k_cluster,bar_width=0.3,figsize=(15,5),labels=uLabelEXP,rotate=90)
pca_df_E = pca_df.groupby('CellLine')
if not singleTREAT:
labelsC = list(pca_df_E.describe().index.values)
for cl in CL:
labelsE = list(pca_df_E.get_group(cl).groupby(Par).describe().index.values)
expectation = [well for well in wellCON if cl in well][0]
display(Latex('$\color{blue}{\Large Experiments p-Value %s}$'%(expectation)))
chi_square_test_tables(pca_df,labelsE,expectation=expectation,Par=Par)
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
labels= list(pca_df.groupby('Experiment').describe().index.values)
histByKmeansTreatsLabel(pca_df,Par='ExperimentsLabels',k_cluster=k_cluster,bar_width=0.3,figsize=(15,5),labels=labels,rotate=90)
## Figure 16,17 (100%)
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
Par='TimeLayersLabels'
lb_make = LabelEncoder()
pca_df[Par] = lb_make.fit_transform(pca_df['TimeLayers'])
labels= list(pca_df.groupby('TimeLayers').describe().index.values)
histByKmeansTreatsLabel(pca_df,Par=Par,k_cluster=k_cluster,bar_width=0.3,figsize=(15,5),labels=labels,rotate=0)
Par = 'TimeLayers'
expectation = labels[0]
chi_square_test_tables(pca_df,labels,expectation=expectation,Par=Par)
controls.sort(key=len, reverse=True)
COMB = []
if not singleCONTROL:
for ex in pca_df['Experiment']:
TF = False
for cont in controls:
if cont in ex and TF==False:
COMB += [cont]
TF = True
pca_df['CONTROLS'] = COMB
Par='Experiment'
pca_df_C = pca_df.groupby('CONTROLS')
for cl in controls:
labelsE = list(pca_df_C.get_group(cl).groupby(Par).describe().index.values)
expectation = [well for well in wellCON if cl in well][0]
display(Latex('$\color{blue}{\Large Experiments p-Value %s}$'%(expectation)))
chi_square_test_tables(pca_df_C.get_group(cl),labelsE,expectation=expectation,Par=Par)
for treat in uLabelT:
display(Latex('$\color{blue}{\Large Figure\ %s}$'%(treat)))
histogramDataKDELabels(Groups,dataTreat.get_group(treat),Features[:-1],FigureNumber,Par='Groups',nColor=0, nShades=0)
FigureNumber+=1
# for ex in uLabelEXP:
# display(Latex('$\color{blue}{\Large Figure\ %s}$'%(ex)))
# histogramDataKDELabels(Groups,dataEXP.get_group(ex),Features[:-1],FigureNumber,Par='Groups',nColor=k_cluster, nShades=1)
# FigureNumber+=1
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsizeEXP,dpi=100,sharex=True, sharey=True)
fig2, ax2 = plt.subplots(figsize=(6,6))
dataEXP = dataSpecGraphN.groupby(['Experiment'])
palette = sns.color_palette("hls", len(Groups))
for ex, a in zip(exp, ax.reshape(-1)):
exp_df = dataEXP.get_group(ex)
uPar1tmp = exp_df['Groups'].unique()
uPar1tmp.sort()
pal = sns.color_palette([palette[p] for p in [Groups.index(value) for value in uPar1tmp]])
sns.scatterplot(x='TimeIndex', y='y_Pos', hue='Groups', palette=pal, data=exp_df, ax=a)
a.set_title(ex, fontweight='bold', fontsize=15);
a.autoscale(enable=True, tight=True)
labels_handles = {label: handle for ax in fig.axes for handle, label in zip(*ax.get_legend_handles_labels())}
fig.tight_layout(pad=1.02)
# fig.legend(labels_handles.values(),
# labels_handles.keys(),
# loc='upper right',fontsize='xx-large',
# # bbox_transform=plt.gcf().transFigure,
# )
for a in ax.flat:
try:
a.get_legend().remove()
except:
continue
# fig.subplots_adjust(right=0.8)
fig2.legend(labels_handles1.values(),
labels_handles1.keys(),
loc='upper right',fontsize='xx-large',
)
fig2.subplots_adjust(right=0.5)
plt.show()
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
FigureNumber+=1
fig, ax = plt.subplots(nrows=ncols, ncols=nrows, figsize=figsizeEXP[::-1],dpi=100,sharex=True, sharey=True)
fig2, ax2 = plt.subplots(figsize=(6,6))
dataEXP = dataSpecGraphN.groupby(['Experiment'])
palette = sns.color_palette("hls", len(Groups))
for ex, a in zip(exp, ax.reshape(-1)):
exp_df = dataEXP.get_group(ex)
uPar1tmp = exp_df['Groups'].unique()
uPar1tmp.sort()
pal = sns.color_palette([palette[p] for p in [Groups.index(value) for value in uPar1tmp]])
sns.scatterplot(x='TimeIndex', y='y_Pos', hue='Groups', palette=pal, data=exp_df, ax=a)
a.set_title(ex, fontweight='bold', fontsize=15);
a.autoscale(enable=True, tight=True)
labels_handles = {label: handle for ax in fig.axes for handle, label in zip(*ax.get_legend_handles_labels())}
fig.tight_layout(pad=1.02)
for a in ax.flat:
try:
a.get_legend().remove()
except:
continue
fig2.legend(labels_handles1.values(),
labels_handles1.keys(),
loc='upper right',fontsize='xx-large',
)
fig2.subplots_adjust(right=0.5)
plt.show()
dataSpecGraphGroups = dataSpecGraphN.copy()
dataSpecGraphGroups['Groups'] = kmeans_pca['Groups'].copy()
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
display(Latex('$\color{blue}{\Large Descriptive\ Table}$'))
DescriptiveTable(dataSpecGraphGroups,path+title)
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
display(Latex('$\color{blue}{\Large ANOVA\ -\ OneWay}$'))
for col in dataSpecGraphGroups.columns:
try:
dataSpecGraphGroups[col] = np.float64(dataSpecGraphGroups[col])
except:
print(col)
ANOVA_TABLE(dataSpecGraphGroups, Features, path+title, dep='Groups')
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
display(Latex('$\color{blue}{\Large ANOVA\ -\ OneWay}$'))
dataSpecGraphGroups['CellLine'] = pca_df['CellLine'].copy()
dataSpecGraphGroupsCL = dataSpecGraphGroups.groupby('CellLine')
if multipleCL:
for cl in CL:
display(Latex('$\color{blue}{\Large ANOVA\ -\ OneWay\ %s}$'%(cl)))
ANOVA_TABLE(dataSpecGraphGroupsCL.get_group(cl), Features, path+title+' '+cl, dep='Groups')
dataSpecGraphGroups = dataSpecGraphN.copy()
dataSpecGraphGroups['Groups'] = kmeans_pca['Groups'].copy()
display(Latex('$\color{blue}{\Large Figure\ %i}$'%(FigureNumber)))
display(Latex('$\color{blue}{\Large Descriptive\ Table}$'))
if multipleCL:
for cl in CL:
display(Latex('$\color{blue}{\Large Descriptive\ Table\ %s}$'%(cl)))
DescriptiveTable(dataSpecGraphGroupsCL.get_group(cl),path+title+' '+cl)
with open(path+title+'split.pickle', 'wb') as f:
pickle.dump([pca_df, FigureNumber, kmeans_pca, labelsT, k_cluster, AE_df,
dataSpecGraphN, dataEXP, Groups, Features, exp, pca], f)